1 Introduction

Here, we will cluster the cells of the patient with id “019”.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

2.2 Define parameters

# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_clustered_19.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))


# Thresholds
max_pct_mt <- 18

2.3 Load data

seurat <- readRDS(path_to_obj)

3 Subset and reprocess

Of note, everytime that we filter out a specific population, the set of highly variable genes (HVG) and the axis of variability (PCs) changes. Thus, we need to rerun the main analysis steps iteratively.

First, we focus on the patient studied in this notebook:

seurat <- subset(seurat, donor_id == "19")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")

As we can see, there is a large batch effect between the two experiments we conducted BCLLATLAS_10 and BCLLATLAS_29. As we know, in BCLLATLAS_10 we obtain single-cell transcriptomes for serial clinical samples of 4 donors. However, as we observed that some points were underrepresented (had less cells), we performed scRNA-seq of specific cases (BCLLATLAS_29). Thus, we will initially work with BCLLATLAS_10, and use BCLLATLAS_29 later as a validation.

seurat <- subset(seurat, subproject == "BCLLATLAS_10")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")

FeaturePlot(seurat, "pct_mt") +
  scale_color_viridis_c(option = "magma")

FeaturePlot(seurat, "nFeature_RNA") +
  scale_color_viridis_c(option = "magma")

We see a subpopulation with characteristics of low-quality cells: high mitochondrial expression and low number of detected features (genes). Hence, let us exclude it from the dataset:

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8286
## Number of edges: 274795
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8626
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(seurat)

seurat <- subset(seurat, seurat_clusters != "3")
seurat <- subset(seurat, pct_mt < max_pct_mt)
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat)

DimPlot(seurat, group.by = "time_point")

4 Cluster

seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7435
## Number of edges: 248190
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7749
## Number of communities: 9
## Elapsed time: 1 seconds
DimPlot(seurat)

# Is cluster 7 composed of poor-quality cells?
markers_7 <- FindMarkers(
  seurat, ident.1 = "7",
  only.pos = TRUE,
  logfc.threshold = 0.3
)
DT::datatable(markers_7)

As we can see, the fold-changes are very small and the cells are scattered all over the place. Thus, we will remove cluster 7 and recluster:

seurat <- subset(seurat, seurat_clusters != "7")
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7284
## Number of edges: 245726
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7773
## Number of communities: 8
## Elapsed time: 1 seconds
seurat$final_clusters <- seurat$seurat_clusters
Idents(seurat) <- "final_clusters"
DimPlot(seurat)

DimPlot(seurat, group.by = "time_point")

5 Save

saveRDS(seurat, path_to_save)

6 Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.6          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.3        tidyverse_1.3.1      harmony_1.0          Rcpp_1.0.6           SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.18.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         crosstalk_1.1.1       listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.5.0           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           limma_3.46.0          remotes_2.4.0         globals_0.14.0        modelr_0.1.8          matrixStats_0.59.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.4.1           xfun_0.23             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.8          future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             miniUI_0.1.1.1        viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   rsvd_1.0.5            DT_0.18               htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3      
##  [55] sass_0.4.0            uwot_0.1.10           dbplyr_2.1.1          deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.7           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         knitr_1.33            fs_1.5.0              fitdistrplus_1.1-5    RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            compiler_4.0.4        rstudioapi_0.13       plotly_4.9.4          png_0.1-7             spatstat.utils_2.2-0  reprex_2.0.0          bslib_0.2.5.1         stringi_1.6.2         highr_0.9             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-4          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.15   spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3          
## [109] httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0              bookdown_0.22         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         parallelly_1.26.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-36           parallel_4.0.4        hms_1.1.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.8         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10